* Programs
** Post results to frame
cap program drop postresults
program define postresults
	* Syntax
	syntax, frame(string) selection(string) [replace] [extra(string)] [save(string)]
	tempname R b_m ll_m ul_m
	
	* Load regression results
	mat `R' = r(table)
	
	* Parse selection list
	tokenize "`selection'", parse(",")
	local createExp ""
	local postExp ""
	
	* Fill in create and post expressions
	while "`1'" != "" {
	    * Parse variable number and name
	    local varN : word 1 of `1'
		local varS : word 2 of `1'
		
		* Obtain coeff and ll/ul
		mat `b_m' = `R'["b", `varN']
		local b = `b_m'[1,1]
		mat `ll_m' = `R'["ll", `varN']
		local ll = `ll_m'[1,1]
		mat `ul_m' = `R'["ul", `varN']
		local ul = `ul_m'[1,1]
		
		* Update commands
		local createExp "`createExp' `varS'_b `varS'_ll `varS'_ul"
		local postExp "`postExp' (`b') (`ll') (`ul')"
		
		* Move on
		macro shift 2
	
	}
	
	* Manage frames
	** If it doesn't exist: create
	capture confirm frame `frame'
	if _rc != 0 {
	    frame create `frame' str100(extra) `createExp'
	}
	else {
	    if "`replace'" != "" {
		    frame drop `frame'
			frame create `frame' str100(extra) `createExp'
		}
	}
	
	frame post `frame' ("`extra'") `postExp'
	
	if "`save'" != "" {
		frame `frame': save "`save'", replace
	}
end

** Resultplot
if c(username) != "n15099" qui do "G:\Other\SSC programs\resultplot\Versions\toEdit\resultplot.do"

** Gap dummies from BLS + histograms
cap program drop dummyGenerator
program define dummyGenerator, rclass
	** Parse options
	syntax varname [aw pw iw fw], gapcount(integer) middlebinsize(real) binname(string) [show] [graph p(string) graphbinwidth(integer 1)] [gaplabel(string) xlabel(string) path(string) edge(string) noxlines] 
	local gapvar = "`varlist'"
	local gapCount = "`gapcount'"
	local middleBinSize = "`middlebinsize'"
	local binName = "`binname'"	
	
	** Technicalities
	version 15.1
	tempvar gapDummies
	if "$exampleData" == "1" replace `gapvar' = rnormal(0, 2)
		
	local gapsOneSide = (`gapCount'-1)/2
	local thresholdsOneSide = `gapsOneSide' - 1
	local middleBinSize_neg = 100 - `middleBinSize'
	local middleBinSize_pos = `middleBinSize'
		
	** Cutoffs for middle group
	_pctile `gapvar' [`weight'`exp'] if `gapvar' <= 0, percentiles(`middleBinSize_neg')
	local middleThreshold_neg = r(r1)
	_pctile `gapvar' [`weight'`exp'] if `gapvar' > 0, percentiles(`middleBinSize_pos')
	local middleThreshold_pos = r(r1)

	** Extreme group cutoffs
	_pctile `gapvar' [`weight'`exp'] if `gapvar' < `middleThreshold_neg', nquantiles(`gapsOneSide')
	//mat cutoffsNeg = r(quantiles_used)
	mat cutoffsNeg = J(`thresholdsOneSide',1,.)
	foreach ii of numlist 1/`thresholdsOneSide' {
	mat cutoffsNeg[`ii',1] = r(r`ii')
	} 
	
	_pctile `gapvar' [`weight'`exp'] if `gapvar' > `middleThreshold_pos', nquantiles(`gapsOneSide')
	//mat cutoffsPos = r(quantiles_used)
	mat cutoffsPos = J(`thresholdsOneSide',1,.)
	foreach ii of numlist 1/`thresholdsOneSide' {
		mat cutoffsPos[`ii',1] = r(r`ii')
	} 	

	** Generate variable
	qui gen `gapDummies' = 0 if inrange(`gapvar', `middleThreshold_neg', `middleThreshold_pos')		// Edges inclusive
	qui replace `gapDummies' = 1 if `gapvar' > `middleThreshold_pos' & ~missing(`gapvar')
	qui replace `gapDummies' = -1 if `gapvar' < `middleThreshold_neg'
	qui forvalues i = 1/`thresholdsOneSide' {
		local gapNumber = `i' + 1
		local rowNeg = `thresholdsOneSide' + 1 - `i'
		replace `gapDummies' = `gapNumber'  if `gapvar' > cutoffsPos[`i',1] & ~missing(`gapvar')
		replace `gapDummies' = -`gapNumber' if `gapvar' < cutoffsNeg[`rowNeg',1]
	}
	
	** Rename
	rename `gapDummies' `binName'
	gen `binName'_abs = abs(`binName')
	
	** Show tab
	if "`show'" == "show" {
		noisily tab `binName'
		noisily tab `binName'_abs
	}
	
	** Generate matrix and local of thresholds
	matrix thresholds = cutoffsNeg \ `middleThreshold_neg' \ `middleThreshold_pos' \ cutoffsPos
	foreach i of numlist 1/`=rowsof(thresholds)' {
		local thresholds `thresholds' `=thresholds[`i',1]'
		local thresholds100 `thresholds100' `=thresholds[`i',1]*100'
	}
	
	** Make bar charts
	if "`graph'" != "" {
		*preserve
		drop if missing(`gapvar')
		tempvar deviation weightvar D_P_up D_P_down
		
		* Round and bound gaps
		if "`graphbinwidth'" != "1" {
			tempvar newgap
			gen double `newgap' = `gapvar'/`graphbinwidth'
			gen double `deviation' = round(`newgap', 0.01)
			replace `deviation' = `deviation'*`graphbinwidth'
		}
		else gen double  `deviation' = round(`gapvar', 0.01)
		sum `deviation', d
		if "`edge'" == "" {
			replace `deviation' = r(p99) if `deviation' >= r(p99) & `deviation' != .
			replace `deviation' = r(p1) if `deviation' <= r(p1)
		}
		else {
		    replace `deviation' = `edge'  if `deviation' >= `edge' & `deviation' != .
			replace `deviation' = -`edge' if `deviation' <= -`edge'
		}
		
		* Calculate statistics
		** Density
		qui sum `deviation', meanonly
		gen `weightvar' = 1/_N
		if "`weight'" != "" replace `weightvar' `exp'
		tab `deviation'
		total `weightvar', over(`deviation')
		mat density = r(table)
		
		** Frequency of price change
		mean D01_`p'_h1 [aw=`weightvar'], over(`deviation')
		mat freq = r(table)
		
		gen `D_P_up' = (DL_`p'_h1 > 0 & DL_`p'_h1 != .)
		mean `D_P_up' [aw=`weightvar'], over(`deviation')
		mat freq_up = r(table)
		
		gen `D_P_down' = (DL_`p'_h1 < 0 & DL_`p'_h1 != .)
		mean `D_P_down' [aw=`weightvar'], over(`deviation')
		mat freq_down = r(table)
		
		** Size of price change, conditional on changeH
		mean DL_`p'_h1 [aw=`weightvar'] if DL_`p'_h1 != 0, over(`deviation')
		mat size = r(table)
		
		mean DL_`p'_h1 [aw=`weightvar'] if (DL_`p'_h1 > 0 & DL_`p'_h1 != .), over(`deviation')
		mat size_up = r(table)
		mean DL_`p'_h1 [aw=`weightvar'] if (DL_`p'_h1 < 0 & DL_`p'_h1 != .), over(`deviation')
		mat size_down = r(table)

		** Get means and SEs into dataset
		local variables = "density freq freq_up freq_down size size_up size_down"
		foreach var of local variables {
			tempfile results`var'
			clear
			mat `var'_t = `var'' 
			xsvmat, from(`var'_t) names(col) fast rownames(`deviation')
			rename * `var'_* 
			drop `var'_t `var'_pvalue `var'_se `var'_df `var'_crit `var'_eform
			rename (`var'_b `var'_ll `var'_ul) (`var'_mean `var'_ll `var'_ul)
			save `results`var''
		} 
		
		clear
		gen `deviation' = ""
		foreach var of local variables {
			merge 1:1 `deviation' using `results`var'', nogen
		}
		
		drop if real(`deviation') == .
		destring `deviation', replace

		foreach var of varlist * {
		    replace `var' = `var' * 100
		}
		
		** Graph
		destring `deviation', replace
		label var `deviation' "`gaplabel'"
		gen deviationCopy = `deviation'
		save "`path'/dispersionData${sample}", replace
		
		if "`xlines'" != "noxlines" local xlineCode "xline(`thresholds100', lcolor(black%25) lpattern(dash))"
		foreach var of local variables {
			twoway (bar `var'_mean `deviation', barwidth(`graphbinwidth') fcolor(black%50) lcolor(white)) /*(rcap `var'_ll `var'_ul `deviation', lcolor(black%80)) */, `xlineCode' legend(off) graphregion(color(white)) ylabel(, glcolor(black%5)) xlabel(`xlabel') ytitle("in %") scale(1.4)	// [*]
			graph export 	"`path'/`gapvar'_`var'${sample}.png", replace width(1600)
			graph export 	"`path'/`gapvar'_`var'${sample}.pdf", replace
			graph save 		"`path'/`gapvar'_`var'${sample}.gph", replace
		}
		*restore
	}
	
	** Return matrix and local of thresholds
	noisily matrix list thresholds
	return matrix thresholds = thresholds
end
** statnoself
cap program drop statnoself
program define statnoself, byable(onecall)
	syntax varlist
	local x  "`varlist'"
	tempvar mean count ssd ssd_contrib ssdS
	by `_byvars': gegen `mean' = mean(`x')
	by `_byvars': gegen `count' = count(`x')
		
	gen `ssd_contrib' = (`x' - `mean')^2
	by `_byvars': egen `ssd' = total(`ssd_contrib')
		
	gen meanS = (`count' * `mean' - `x')/(`count' - 1)
	gen `ssdS' = `ssd' - (`x' - `mean') * (`x' - meanS)
	gen sdS = sqrt(`ssdS' / (`count' - 1 - 1))
end

cap program drop graphsave3
program define graphsave3
	graph save "`0'", replace
	graph export "`0'.pdf", replace
	graph export "`0'.png", width(1600) replace
end

** addDiff
cap program drop addDiff
program define addDiff
	syntax, ivar(varname) cvar(varname) ilevels(numlist)
	foreach i of local ilevels {
		lincom _b[`i'.`ivar'#c.`cvar'] - _b[1.`ivar'#c.`cvar']
		estadd scalar diff_`i'_b = round(r(estimate), 0.001)
		estadd scalar diff_`i'_p = round(r(p), 0.001)
	}
end

** Gap dummies
cap program drop genGapDummies
program define genGapDummies
	* Parse syntax
	syntax varname, n(integer) suffix(string) [abs] [gen]
	local var = "`varlist'"
	local suffix = "`n'" + "`suffix'"
	noisily di "Generating variable gapDummies_`suffix', which has `n' quantiles per sign of `var'"
	
	* Generate dummies
	tempvar posDummy negDummy
	tempvar flippedVar
	gen `flippedVar' = - `var'
	fasterxtile `posDummy' = `var' if `var' > 0, nquantiles(`n')
	fasterxtile `negDummy' = `flippedVar' if `var' <= 0, nquantiles(`n')
	gen gapDummies`suffix' = cond(`posDummy' != ., `posDummy', 1000+`negDummy')

	* Add label
	label define gapDummies`suffix' 0 "Error if you see this", replace
	cap noisily label save gapDummies`suffix' using "$path_dofiles/Labels/label_gapDummies`suffix'", replace
	forvalues i = 1/`n' {
		local labelP = "Gap`suffix': " + `i'*"+"
		local labelN = "Gap`suffix': " + `i'*"-"
		label define gapDummies`suffix' `i' "`labelP'" `=1000+`i'' "`labelN'", add
	}
	
	label values gapDummies`suffix' gapDummies`suffix'
	if "`gen'" != "" {
		tab gapDummies`suffix', gen(gapDummies`suffix'_)
		drop gapDummies`suffix'_1
	}
	bysort gapDummies`suffix': sum `var'

	* Absolute gap dummies
	if "`abs'" != "" {
		gen absGapDummies`suffix' = abs(gapDummies`suffix') - 1000*(gapDummies`suffix' > 1000)
		
		label define absGapDummies`suffix' 0 "Error if you see this", replace
		cap noisily label save absGapDummies`suffix' using "$path_dofiles/Labels/label_absGapDummies`suffix'", replace
		forvalues i = 1/`n' {
			local labelA = "Gap`suffix'A: " + `i'*"-" + "/" + `i'*"+"
			label define absGapDummies`suffix' `i' "`labelA'", add
		}
		label values absGapDummies`suffix' absGapDummies`suffix'
		if "`gen'" != "" {
			tab absGapDummies`suffix', gen(absGapDummies`suffix'_)
			drop absGapDummies`suffix'_1
		}
	}
end

** Datetime adding log
cap program drop log2
program define log2
	capture noisily log `0'
	if _rc == 608 {
		di as error "Retrying with datetime added"
		local date = subinstr("`c(current_date)'", " ", "", .)
		local time = subinstr("`c(current_time)'", ":", "", .)
		local datetime = "`date'_`time'"
		local newInput = subinstr(`"`0'"', ".log", "_`datetime'.log", 1)
		local newInput = subinstr(`"`newInput'"', ".smcl", "_`datetime'.smcl", 1)
		log `newInput'
	}
end

** VCE twoway clustering
cap program drop vcecluster
cap program drop vcecluster_mlMulti
program define vcecluster
	* Parse Syntax
	di as result "vcecluster"
	syntax, cluster1(passthru) cluster2(passthru) [force]
	local regularSingleEquationModels "regress"
	local mlSingleEquationModels ""
	local mlMultiEquationModels "heckman"
	
	* Identify model that preceeded
	if inlist("`regularSingleEquationModels'", "`e(cmd)'") vcecluster_regSingle, `cluster1' `cluster2' `force' 	// Single Equation Models
	else if inlist("`mlSingleEquationModels'", "`e(cmd)'") vcecluster_mlSingle, `cluster1' `cluster2' `force'	// ML Single Equation Models
	else if inlist("`mlMultiEquationModels'", "`e(cmd)'") vcecluster_mlMulti, `cluster1' `cluster2' `force'		// ML Multi Equation Models
	else {
		di as result "`e(cmd)'" as text " does not appear to be in the accepted list of models: "
		di _col(4) as text "Regular Single Equation Models: " as result "`regularSingleEquationModels'"
		di _col(4) as text "ML Single Equation Models: " as result "`mlSingleEquationModels'"
		di _col(4) as text "ML Multiple Equation Models: " as result "`mlMultiEquationModels'"
		
		if "`force'" == "" di as text "Specify" as result " force" as text " to force the command to still do things for you."
		else if "`force'" == "force" {
			di _newline as text "Would you like to force the program to treat your model as one of those types?"
			di as text "If so, type either regSingle, mlSingle or mlMulti", request(forcedType)
			if inlist("`forcedType'", regSingle, mlSingle, mlMulti) vcecluster_`forcedType'
			else {
				di _newline as text "User did not enter regSingle, mlSingle or mlMulti. Exiting program here."
				error 666
			}
		}
	}
end

program define vcecluster_mlMulti, eclass
	* Parse Syntax
	syntax, cluster1(varname) cluster2(varname) [force]
	
	* Error checking
	** There should not be any missings in the cluster variables
	*** Cluster 1
	qui count if e(sample) == 1 & `cluster1' == .
	if r(N) > 0 {
		di as text "There are missings in the first cluster variable (" as result "`cluster1'" as text ")."
		if "`force'" == "" error 666
	}
	
	*** Cluster 2
	qui count if e(sample) == 1 & `cluster2' == .
	if r(N) > 0 {
		di as text "There are missings in the second cluster variable (" as result "`cluster2'" as text ")."
		if "`force'" == "" error 666
	}
	
	** The provided covariance matrix should be based on oim standard errors (no robust/cluster)
	if e(vce) != "oim" {
		di "The provided covariance matrix should be based on oim standard errors (no robust/cluster)."
		if "`force'" == "" error 666
	}
	
	* Generate combi-var
	** Assign tempvar
	tempvar cluster12
	
	** Use gtools if available
	cap which gtools
	if _rc == 0 		gegen `cluster12' = group(`cluster1' `cluster2')
	else if _rc == 111	 egen `cluster12' = group(`cluster1' `cluster2')
	else {
		if "`force'" == "" error 666
	}
	
	* Calculate variance matrices
	** Store oim variance matrix 3x
	tempname D1 D2 D12 V_2wc
	mat `D1' = e(V)
	mat `D2' = e(V)
	mat `D12' = e(V)
	
	** Predict scores
	predict vcecluster_scores_*, scores
	
	** Obtain weight statement
	if "`e(wtype)'" != "" local weightExp "[`e(wtype)' `e(wexp)']"
	
	
	** Let Stata calculate the cluster-robust variance matrices
	_robust vcecluster_scores_* `weightExp' if e(sample) == 1, v(`D1') cluster(`cluster1')
	_robust vcecluster_scores_* `weightExp' if e(sample) == 1, v(`D2') cluster(`cluster2')
	_robust vcecluster_scores_* `weightExp' if e(sample) == 1, v(`D12') cluster(`cluster12')
	drop vcecluster_scores_*
	
	** Calculate twoway clustered variance matrix
	mat `V_2wc' = `D1' + `D2' - `D12'
	
	** Post resylts
	mata: fixMatrix("`V_2wc'")
	ereturn repost V = `V_2wc'
	ereturn display
end

cap mata: mata drop fixMatrix()
mata: 
	//mata drop fixMatrix()
	void fixMatrix(string scalar stataMatrix) {
		real matrix original, U, l, newMatrix
		real scalar i
		original = st_matrix(stataMatrix)
		symeigensystem(original, U, l)
		
		for(i=1;i<=cols(l);i++){
			l[i] = max((l[i], 0))
		}
		
		newMatrix = U*diag(l)*U'
		
		st_replacematrix(stataMatrix, newMatrix)
	}
end

** Heckamn with 2-way clustered SE
cap program drop heckman2
program define heckman2
	* Parse syntax
	gettoken heckmanCommand clusters : 0, parse(";")
	local heckmanCommand = `"heckman `heckmanCommand'"'
	local clusters = subinstr("`clusters'", ";", "", .)
	gettoken cluster1 cluster2: clusters, parse(" ")
	local cluster1 = subinstr("`cluster1'", " ", "", .)
	local cluster2 = subinstr("`cluster2'", " ", "", .)
	
	* Show parsing outcome
	di as result "Heckman2"
	di _skip(3) as text "Regression command: " 	_col(24) as result `"`heckmanCommand'"'
	di _skip(3) as text "Cluster 1: " 			_col(24) as result "`cluster1'"
	di _skip(3) as text "Cluster 2: " 			_col(24) as result "`cluster2'"
	
	* Do Heckmans
	tempname v_1 v_2 v_12 v_twoway
	tempvar cluster12
	
	** Cluster 1
	`heckmanCommand' cluster(`cluster1')
	mat `v_1' = e(V)
	
	** Cluster 2
	`heckmanCommand' cluster(`cluster2')
	mat `v_2' = e(V)
	
	** Cluster 12
	egen `cluster12' = group(`cluster1' `cluster2')
	`heckmanCommand' cluster(`cluster12')
	mat `v_12' = e(V)
	
	** Cluster twoway
	mat `v_twoway' = `v_1' + `v_2' - `v_12'
	sete, newmat(`v_twoway') toreplace(V)
	
	di _newline "Results with twoway clustered standard errors."
	ereturn display
	
	** Warning
	di "Warning: number of clusters and clustervar are wrong (result of 1#2)."
end

** E-program to replace e-matrices
cap program drop sete
program define sete, eclass
	syntax, newmat(string) toreplace(string)
	
	mat old_`toreplace' = e(`toreplace')		// Store old e-matrix
	ereturn repost `toreplace' = `newmat'		// Insert new e-matrix
	mat `newmat' = e(`toreplace')				// Retrieve new e-matrix in initial name (it is destroyed by ereturn repost)
end

** Make Category Numeric
cap program drop makeCategoryNumeric2
program define makeCategoryNumeric2	// This one should be faster
	gen category = .
	replace category = cond( categoryS =="beer", 1, ///
	cond( categoryS =="blades", 2,	 ///
	cond( categoryS =="carbbev", 3,	 ///
	cond( categoryS =="cigets", 4,	 ///
	cond( categoryS =="coffee", 5,	 ///
	cond( categoryS =="coldcer", 6,	 ///
	cond( categoryS =="deod", 7,	 ///
	cond( categoryS =="diapers", 8,	 ///
	cond( categoryS =="factiss", 9,	 ///
	cond( categoryS =="fzdinent", 10, ///
	cond( categoryS =="fzpizza", 11, ///
	cond( categoryS =="hhclean", 12, ///
	cond( categoryS =="hotdog", 13, ///
	cond( categoryS =="laundet", 14, ///
	cond( categoryS =="margbutr", 15, ///
	cond( categoryS =="mayo", 16, ///
	cond( categoryS =="milk", 17, ///
	cond( categoryS =="mustketc", 18, ///
	cond( categoryS =="paptowl", 19, ///
	cond( categoryS =="peanbutr", 20, ///
	cond( categoryS =="photo", 21, ///
	cond( categoryS =="razors", 22, ///
	cond( categoryS =="saltsnck", 23, ///
	cond( categoryS =="shamp", 24, ///
	cond( categoryS =="soup", 25, ///
	cond( categoryS =="spagsauc", 26, ///
	cond( categoryS =="sugarsub", 27, ///
	cond( categoryS =="toitisu", 28, ///
	cond( categoryS =="toothbr", 29, ///
	cond( categoryS =="toothpa", 30, ///
	cond( categoryS =="yogurt", 31, 999)))))))))))))))))))))))))))))))
	drop categoryS
end

cap program drop makeCategoryNumeric
program define makeCategoryNumeric
	gen category = .
	replace category = 1 if categoryS =="beer"
	replace category = 2 if categoryS =="blades"
	replace category = 3 if categoryS =="carbbev"
	replace category = 4 if categoryS =="cigets"
	replace category = 5 if categoryS =="coffee"
	replace category = 6 if categoryS =="coldcer"
	replace category = 7 if categoryS =="deod"
	replace category = 8 if categoryS =="diapers"
	replace category = 9 if categoryS =="factiss"
	replace category = 10 if categoryS =="fzdinent"
	replace category = 11 if categoryS =="fzpizza"
	replace category = 12 if categoryS =="hhclean"
	replace category = 13 if categoryS =="hotdog"
	replace category = 14 if categoryS =="laundet"
	replace category = 15 if categoryS =="margbutr"
	replace category = 16 if categoryS =="mayo"
	replace category = 17 if categoryS =="milk"
	replace category = 18 if categoryS =="mustketc"
	replace category = 19 if categoryS =="paptowl"
	replace category = 20 if categoryS =="peanbutr"
	replace category = 21 if categoryS =="photo"
	replace category = 22 if categoryS =="razors"
	replace category = 23 if categoryS =="saltsnck"
	replace category = 24 if categoryS =="shamp"
	replace category = 25 if categoryS =="soup"
	replace category = 26 if categoryS =="spagsauc"
	replace category = 27 if categoryS =="sugarsub"
	replace category = 28 if categoryS =="toitisu"
	replace category = 29 if categoryS =="toothbr"
	replace category = 30 if categoryS =="toothpa"
	replace category = 31 if categoryS =="yogurt"
	drop categoryS
end

** Cluster sample
cap program drop csample
program define csample
	* Syntax
	** Parse
	syntax anything(name=percentageToKeep id="Percentage/Number To Keep"), cluster(varlist) [count] [fast]
	if "`count'" == "count" local numberToKeep = `percentageToKeep'
	
	** Check
	if `percentageToKeep' >= 1 & "`count'" == "" local percentageToKeep = `percentageToKeep'/100
	
	
	** Report
	if "`count'" == "count" di "Retaining `numberToKeep' clusters of `cluster'"
	if "`count'" != "count" di "Retaining `=`percentageToKeep'*100' percent of the `cluster' clusters"
	
	* Draw clusters
	** Percent
	if "`count'" == "" {
		tempvar dice n
		if "`fast'" == "" gen `n' = _n		// Save initial sort order
		
		bysort `cluster': gen `dice' = runiform() if _n == 1
		bysort `cluster': replace `dice' = `dice'[1]
		
		* Reduce to sample
		keep if `dice' < `percentageToKeep'
		if "`fast'" == "" sort `n'			// Restore initial sort order
	}
	
	** Count
	if "`count'" == "count" {
		tempvar dice n n_temp diceOrder
		if "`fast'" == "" gen `n' = _n		// Save initial sort order
		
		bysort `cluster': gen `dice' = runiform() if _n == 1
		gen `n_temp' = _n
		
		sort `dice'
		gen `diceOrder' = _n if ~missing(`dice')
		
		bysort `cluster' (`n_temp'): replace `diceOrder' = `diceOrder'[1]
		
		* Reduce to sample
		keep if `diceOrder' <= `numberToKeep'
		if "`fast'" == "" sort `n'			// Restore initial sort order
	}
	
end

** Retain indicator
cap program drop generateRetainIndicator
program define generateRetainIndicator
	bysort id_nr 		: gen end = month[_N]
	bysort id_nr 		: gen start = month[1]
	gen endYear = yofd(dofm(end))
	gen startYear = yofd(dofm(start))
	gen startMonth = month(dofm(start))
	gen endMonth = month(dofm(end))

	gen retain = (year < endYear	& year > startYear)														// Retain if ... the product started selling before the current year and stopped after	// "the middle"
	replace retain = 1 if year == endYear				& endMonth 	== 12	& year > startYear 					// ... the product started selling in January this year									// "the tail"
	replace retain = 1 if year == startYear			  	& startMonth 	== 1	& year < endYear				// ... the product stopped selling in December this year								// "the head"
	replace retain = 1 if endYear == startYear 			& startMonth 	== 1	& endMonth == 12
end		

* Labels (clear removes value labels, so more convenient to define program and call it as required)
** Category
cap program drop defineCategoryLabel
program define defineCategoryLabel
	cap label drop categoryLabel
	label define categoryLabel 1 `"beer"', modify
	label define categoryLabel 2 `"blades"', modify
	label define categoryLabel 3 `"carbbev"', modify
	label define categoryLabel 4 `"cigets"', modify
	label define categoryLabel 5 `"coffee"', modify
	label define categoryLabel 6 `"coldcer"', modify
	label define categoryLabel 7 `"deod"', modify
	label define categoryLabel 8 `"diapers"', modify
	label define categoryLabel 9 `"factiss"', modify
	label define categoryLabel 10 `"fzdinent"', modify
	label define categoryLabel 11 `"fzpizza"', modify
	label define categoryLabel 12 `"hhclean"', modify
	label define categoryLabel 13 `"hotdog"', modify
	label define categoryLabel 14 `"laundet"', modify
	label define categoryLabel 15 `"margbutr"', modify
	label define categoryLabel 16 `"mayo"', modify
	label define categoryLabel 17 `"milk"', modify
	label define categoryLabel 18 `"mustketc"', modify
	label define categoryLabel 19 `"paptowl"', modify
	label define categoryLabel 20 `"peanbutr"', modify
	label define categoryLabel 21 `"photo"', modify
	label define categoryLabel 22 `"razors"', modify
	label define categoryLabel 23 `"saltsnck"', modify
	label define categoryLabel 24 `"shamp"', modify
	label define categoryLabel 25 `"soup"', modify
	label define categoryLabel 26 `"spagsauc"', modify
	label define categoryLabel 27 `"sugarsub"', modify
	label define categoryLabel 28 `"toitisu"', modify
	label define categoryLabel 29 `"toothbr"', modify
	label define categoryLabel 30 `"toothpa"', modify
	label define categoryLabel 31 `"yogurt"', modify
end

cap program drop encodeCategories
program define encodeCategories
	gen categoryId = .
	replace categoryId = 1 if category == "beer"
	replace categoryId = 2 if category == "blades"
	replace categoryId = 3 if category == "carbbev"
	replace categoryId = 4 if category == "cigets"
	replace categoryId = 5 if category == "coffee"
	replace categoryId = 6 if category == "coldcer"
	replace categoryId = 7 if category == "deod"
	replace categoryId = 8 if category == "diapers"
	replace categoryId = 9 if category == "factiss"
	replace categoryId = 10 if category == "fzdinent"
	replace categoryId = 11 if category == "fzpizza"
	replace categoryId = 12 if category == "hhclean"
	replace categoryId = 13 if category == "hotdog"
	replace categoryId = 14 if category == "laundet"
	replace categoryId = 15 if category == "margbutr"
	replace categoryId = 16 if category == "mayo"
	replace categoryId = 17 if category == "milk"
	replace categoryId = 18 if category == "mustketc"
	replace categoryId = 19 if category == "paptowl"
	replace categoryId = 20 if category == "peanbutr"
	replace categoryId = 21 if category == "photo"
	replace categoryId = 22 if category == "razors"
	replace categoryId = 23 if category == "saltsnck"
	replace categoryId = 24 if category == "shamp"
	replace categoryId = 25 if category == "soup"
	replace categoryId = 26 if category == "spagsauc"
	replace categoryId = 27 if category == "sugarsub"
	replace categoryId = 28 if category == "toitisu"
	replace categoryId = 29 if category == "toothbr"
	replace categoryId = 30 if category == "toothpa"
	replace categoryId = 31 if category == "yogurt"
	
	drop category
	rename categoryId category
end